Plotting with Base Graphics

1. Histogram

library(datasets)

hist(mtcars$disp)

hist(mtcars$disp, breaks=100, col="Green")

## density instead of frequency

hist(mtcars$disp, breaks=100, col="Green", freq=FALSE)

## density plot
d <- density(mtcars$mpg) ## saving the density output in a variable
plot(d) # intelligently plots the results

## filled density plot

d <- density(mtcars$mpg)
plot(d, main="Kernel Density of Miles Per Gallon")
polygon(d, col="red", border="blue")

2. Scatterplot

## population vs income
plot(state.x77[,1], state.x77[,2])

## adding plot title
plot(state.x77[,1], state.x77[,2], main = "Population vs Income")

#### adding x and y labels - xlab and ylab
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income")

#### adding color - color number
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=2)

#### adding color - with name
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col="blue")

#### pch

## changing type of point using pch
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20)

#### cex

## controlling size of symbols using cex

### cex = 0.8
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20, cex = 0.8)

### cex = 1.8
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, pch=20, cex = 1.8)

3. Line graphs
## line plots
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="l")

## points and lines
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b")

## line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=2)

## different line type
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=4)

## line width
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3, type="b", lty=4, lwd=2)

## abline
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
abline(h=4000,col="red")
abline(v=7000,col="blue", lty=3, lwd=4)

plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)
model <- lm(state.x77[,2] ~ state.x77[,1])
abline(model, lwd=2, lty=3)

labelling points

## Example of labeling points

plot(mtcars$wt, mtcars$mpg, main="Mileage vs. Car Weight", 
    xlab="Weight", ylab="Mileage", pch=18, col="blue")
text(mtcars$wt, mtcars$mpg, row.names(mtcars), cex=0.6, pos=4, col="red")

Illustrating all type= values

x <- c(1:5); y <- x # create some data 
par(pch=22, col="red") # plotting symbol and color 
par(mfrow=c(2,4)) # all plots on one page 
opts = c("p","l","o","b","c","s","S","h") 
for(i in 1:length(opts)){ 
  heading = paste("type=",opts[i]) 
  plot(x, y, type="n", main=heading) 
  lines(x, y, type=opts[i]) 
}

4. Boxplot

library(datasets)
boxplot(state.x77)

boxplot(scale(state.x77))

## population
boxplot(state.x77[,1], ylab="Population")
title("Boxplot of State Populations")

# Boxplot of MPG by Car Cylinders 
boxplot(mpg~cyl,data=mtcars, main="Car Milage Data", 
    xlab="Number of Cylinders", ylab="Miles Per Gallon")


5. Multiple plots on screen

## Row-wise
par(mfrow=c(2,1))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)

plot(state.x77[,1], ylab="Population")

## Column-wise
par(mfcol=c(1,2))
plot(state.x77[,1], state.x77[,2], xlab="Population", ylab="Income", col=3)

plot(state.x77[,1], ylab="Population")

plot(state.x77[,2], ylab="Income")

par(mfcol=c(1,1))


6. Matrix plots using matplot

JohnsonJohnson
##       Qtr1  Qtr2  Qtr3  Qtr4
## 1960  0.71  0.63  0.85  0.44
## 1961  0.61  0.69  0.92  0.55
## 1962  0.72  0.77  0.92  0.60
## 1963  0.83  0.80  1.00  0.77
## 1964  0.92  1.00  1.24  1.00
## 1965  1.16  1.30  1.45  1.25
## 1966  1.26  1.38  1.86  1.56
## 1967  1.53  1.59  1.83  1.86
## 1968  1.53  2.07  2.34  2.25
## 1969  2.16  2.43  2.70  2.25
## 1970  2.79  3.42  3.69  3.60
## 1971  3.60  4.32  4.32  4.05
## 1972  4.86  5.04  5.04  4.41
## 1973  5.58  5.85  6.57  5.31
## 1974  6.03  6.39  6.93  5.85
## 1975  6.93  7.74  7.83  6.12
## 1976  7.74  8.91  8.28  6.84
## 1977  9.54 10.26  9.54  8.73
## 1978 11.88 12.06 12.15  8.91
## 1979 14.04 12.96 14.85  9.99
## 1980 16.20 14.67 16.02 11.61
class(JohnsonJohnson)
## [1] "ts"
m <- matrix(JohnsonJohnson, ncol=4, byrow = TRUE)
m
##        [,1]  [,2]  [,3]  [,4]
##  [1,]  0.71  0.63  0.85  0.44
##  [2,]  0.61  0.69  0.92  0.55
##  [3,]  0.72  0.77  0.92  0.60
##  [4,]  0.83  0.80  1.00  0.77
##  [5,]  0.92  1.00  1.24  1.00
##  [6,]  1.16  1.30  1.45  1.25
##  [7,]  1.26  1.38  1.86  1.56
##  [8,]  1.53  1.59  1.83  1.86
##  [9,]  1.53  2.07  2.34  2.25
## [10,]  2.16  2.43  2.70  2.25
## [11,]  2.79  3.42  3.69  3.60
## [12,]  3.60  4.32  4.32  4.05
## [13,]  4.86  5.04  5.04  4.41
## [14,]  5.58  5.85  6.57  5.31
## [15,]  6.03  6.39  6.93  5.85
## [16,]  6.93  7.74  7.83  6.12
## [17,]  7.74  8.91  8.28  6.84
## [18,]  9.54 10.26  9.54  8.73
## [19,] 11.88 12.06 12.15  8.91
## [20,] 14.04 12.96 14.85  9.99
## [21,] 16.20 14.67 16.02 11.61
matplot(m, type="l")

7. Q-Q Plots

# Q-Q plots
x1 <- rnorm(100)

qqnorm(x1)
qqline(x1)

# Comparing 2 distributions
par(mfrow=c(1,2))

x <- rt(100, df=3)
# normal fit
qqnorm(x); qqline(x)

# t(3Df) fit 
qqplot(rt(1000,df=3), x, main="t(3) Q-Q Plot", 
   ylab="Sample Quantiles")
abline(0,1)

Interpreting QQ Plots: http://stats.stackexchange.com/a/101290/21450


Key Parameters

Many base plotting functions share a set of parameters. Here are a few key ones:

  • pch: the plotting symbol (default is open circle)
  • lty: the line type (default is solid line)
  • lwd: the line width, specified as an integer multiple
  • col: plotting color
  • main: main plot title
  • xlab: x-axis label
  • ylab: y-axis label

References:

  1. pch: 0 to 25 (refer: http://www.endmemo.com/program/R/pchsymbols.php)
  2. lty: 1 to 6
  3. lwd: 1 to 8

Intro to ggplot2

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
ggplot(data=mtcars, aes(x=wt, y=mpg)) + 
    geom_point() + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")


Common options in geom functions

  • color
  • fill
  • alpha
    • 0: transparent
    • 1: opaque
  • linetype
    • 1 to 6
  • size
  • shape
  • binwidth
  • width
  • position
    • dodge
    • stacked
    • fill
    • jitter

Colors

ggplot(data=mtcars, aes(x=wt, y=mpg, color=cyl)) + 
    geom_point(size=5) + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")

ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) + 
    geom_point() + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")

ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) + 
    geom_point(color="red") + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")


Exercise

Use diamonds dataset and plot this image


Histograms

library(lattice)

ggplot(singer, aes(x=height)) + 
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(singer, aes(x=height)) + 
    geom_histogram(binwidth = 2)


Box plots

ggplot(singer, aes(x=voice.part, y=height)) + 
    geom_boxplot()


Bar plots

data(Salaries, package="car")

ggplot(Salaries, aes(x=rank, fill=sex)) + 
    geom_bar(position="stack") + 
    labs(title='position="stack"') 

ggplot(Salaries, aes(x=rank, fill=sex)) + 
    geom_bar(position="dodge") + 
    labs(title='position="dodge"') 

ggplot(Salaries, aes(x=rank, fill=sex)) + 
    geom_bar(position="fill") + 
    labs(title='position="fill"')

ggplot(Salaries, aes(x=rank, fill=sex)) + 
    geom_bar(position="fill") + 
    labs(title='position="fill"') + 
    coord_flip()

ggplot(Salaries, aes(x=rank, fill=sex)) + 
    geom_bar(position="dodge") + 
    labs(title='position="dodge"') + 
    scale_fill_grey(start = 0, end = 1)


Scatter plots

ggplot(Salaries, aes(x=rank, y=salary, color=sex)) + 
    geom_point()

ggplot(Salaries, aes(x=rank, y=salary, color=sex)) + 
    geom_point(position="jitter", size = 3)

ggplot(Salaries, aes(x=rank, y=salary, color=sex)) + 
    geom_jitter(size = 3)

ggplot(Salaries, aes(x=rank, y=salary, color=sex)) + 
    geom_jitter(aes(shape = sex), size = 3)

ggplot(Salaries, aes(x=yrs.service, y=salary)) + 
    geom_jitter(size = 3) + 
    geom_smooth(method=lm)


Line plots

JohnsonJohnson
##       Qtr1  Qtr2  Qtr3  Qtr4
## 1960  0.71  0.63  0.85  0.44
## 1961  0.61  0.69  0.92  0.55
## 1962  0.72  0.77  0.92  0.60
## 1963  0.83  0.80  1.00  0.77
## 1964  0.92  1.00  1.24  1.00
## 1965  1.16  1.30  1.45  1.25
## 1966  1.26  1.38  1.86  1.56
## 1967  1.53  1.59  1.83  1.86
## 1968  1.53  2.07  2.34  2.25
## 1969  2.16  2.43  2.70  2.25
## 1970  2.79  3.42  3.69  3.60
## 1971  3.60  4.32  4.32  4.05
## 1972  4.86  5.04  5.04  4.41
## 1973  5.58  5.85  6.57  5.31
## 1974  6.03  6.39  6.93  5.85
## 1975  6.93  7.74  7.83  6.12
## 1976  7.74  8.91  8.28  6.84
## 1977  9.54 10.26  9.54  8.73
## 1978 11.88 12.06 12.15  8.91
## 1979 14.04 12.96 14.85  9.99
## 1980 16.20 14.67 16.02 11.61
jj <- matrix(JohnsonJohnson, ncol = 4, byrow = TRUE)

jj <- cbind(matrix(1960:1980),jj)

colnames(jj) <- c("Year","Q1", "Q2", "Q3", "Q4")

jj <- data.frame(jj)

ggplot(jj,aes(x=Year,y=Q1)) + 
    geom_line()

### illustrating example of reshaping data for ggplot plotting
library(reshape2)

melt_jj <- melt(jj, id.vars = "Year")

ggplot(melt_jj, aes(x=Year, y = value, color=variable)) + 
    geom_line()


Grouping

data(Salaries, package="car")
library(ggplot2)

ggplot(Salaries, aes(x=salary)) + 
    geom_density(alpha=0.3)

ggplot(Salaries, aes(x=salary, fill=rank)) + 
    geom_density(alpha=0.3)

ggplot(Salaries, aes(x=yrs.since.phd, y=salary, shape=sex, color=rank )) + 
    geom_point(size=3)


Faceting

Using facet_wrap

data(singer, package="lattice") 
library(ggplot2) 

ggplot(data=singer, aes(x=height)) + 
    geom_histogram() + 
    facet_wrap(~voice.part, nrow=4)

Using facet_grid

m <- mtcars

m$am <- factor(m$am, levels=c(0,1), labels=c("Automatic", "Manual")) 

m$vs <- factor(m$vs, levels=c(0,1), labels=c("V-Engine", "Straight Engine")) 

m$cyl <- factor(m$cyl)

ggplot(data=m, aes(x=hp, y=mpg, shape=cyl, color=cyl)) + 
    geom_point(size=3) + 
    facet_grid(am~vs) + 
    labs(title="Automobile Data by Engine Type", 
         x="Horse Power", y="Miles Per Gallon")


Arranging Plots into Grids

Install the gridExtra package

library(gridExtra)

p1 <- ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) + 
    geom_point(pch=17, color="blue", size=2) + 
    geom_smooth(method="lm", color="red", linetype=2) + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")

p2 <- ggplot(data=mtcars, aes(x=wt, y=mpg, color=factor(cyl))) + 
    geom_point(pch=17, color="blue", size=2) + 
    geom_smooth(method="loess", color="red", linetype=2) + 
    labs(title="Automobile Data", x="Weight", y="Miles Per Gallon")

grid.arrange(p1,p2,ncol=2)

grid.arrange(p1,p2,nrow=2)


Modifying the appearance of ggplot2 graphs

  • Axes
  • Legends
  • Scales
  • Themes

Saving plots to disk

ggplot(mtcars,aes(x=wt, y=hp)) + 
    geom_point()

ggsave("myplot.pdf")
## Saving 7 x 5 in image
ggsave("myplot.png")
## Saving 7 x 5 in image